Written by Aunoy Poddar July 21st, 2022
current_file <- rstudioapi::getActiveDocumentContext()$path
output_file <- stringr::str_replace(current_file, '.Rmd', '.R')
knitr::purl(current_file, output = output_file)
file.edit(output_file)
library(Seurat)
library(tictoc)
library(ggplot2)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(tidyverse)
library(gridExtra)
library(png)
library(cowplot)
library(magick)
library(scales)
library(packcircles)
library(ggalt)
data_dir = '/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/rethresholded'
clump_dir = '/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/clumps'
meta_dir = '/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/overlay'
output_dir_plot = '/home/aunoy/st/arc_profiling/st_analysis/results/plots'
output_dir_tbls = '/home/aunoy/st/arc_profiling/st_analysis/results/tables'
meta_ntrscts = read.csv(file.path(clump_dir, 'meta', 'META_ntrsct.csv'), header = FALSE) %>%
as_tibble()
df_408 = data.frame()
for (file_name in list.files(data_dir)){
if(grepl('164', file_name)){
next
}
#if(grepl('408_TC', file_name) | grepl('408_vMS', file_name)){
# next
#}
print(file_name)
df_to_append <- read.table(file.path(data_dir, file_name), sep = ',', header = TRUE)
print(df_to_append)
while(length(ind <- which(df_to_append$Image.Name == "")) > 0){
df_to_append$Image.Name[ind] <- df_to_append$Image.Name[ind -1]
}
colnames(df_to_append) <- toupper(colnames(df_to_append))
df_to_append <- df_to_append %>%
mutate(area = strsplit(file_name, '.csv')[[1]])
## Add relative_XY_position
if(!is_empty(df_408)){
df_to_append <- df_to_append %>%
dplyr::select(colnames(df_408))
}
df_408 <- rbind(df_408, df_to_append)
}
[1] "408_CC.csv"
[1] "408_dMS_TC.csv"
[1] "408_MS_CC.csv"
[1] "408_TC.csv"
[1] "408_vMS_TC.csv"
df_408$IMAGE.NAME = unlist(lapply(df_408$IMAGE.NAME, gsub, pattern='_Cluster', replacement=''))
df_408$IMAGE.NAME = unlist(lapply(df_408$IMAGE.NAME, gsub, pattern='[*]', replacement=''))
df_408$IMAGE.NAME = unlist(lapply(df_408$IMAGE.NAME, gsub, pattern='X', replacement=''))
df_408$IMAGE.NAME = unlist(lapply(df_408$IMAGE.NAME, gsub, pattern='L2_', replacement='L2-'))
df_408$IMAGE.NAME = unlist(lapply(df_408$IMAGE.NAME, gsub, pattern='-L2', replacement='_L2'))
df_408$IMAGE.NAME = unlist(lapply(df_408$IMAGE.NAME, gsub, pattern='Tc_12', replacement='TC_12'))
## Missing
df_408 = df_408[df_408$IMAGE.NAME != 'Layer1', ]
df_408 = df_408[df_408$IMAGE.NAME != 'TC_1', ]
df_408 = df_408[df_408$IMAGE.NAME != 'TC_18', ]
df_408 = df_408[df_408$IMAGE.NAME != 'TC_19', ]
#df_408$IMAGE.NAME = toupper(df_408$IMAGE.NAME)
unique(df_408$IMAGE.NAME)
[1] "CC_Cortical1" "CC_Cortical2" "CC_L2-1" "CC_L2-2" "CC_L2-3" "TC_2" "TC_3"
[8] "TC_4" "TC_5" "TC_6" "TC_7" "TC_8" "TC_9" "TC_10"
[15] "CC_4" "CC_5" "CC_6" "CC_7" "CC_8" "CC_9" "CC_10"
[22] "CC_11" "CC_12" "TC_16" "TC_17" "TC_20" "TC_11" "TC_12"
[29] "TC_13" "TC_14" "TC_15"
unique(df_408$IMAGE.NAME)
[1] "CC_Cortical1" "CC_Cortical2" "CC_L2-1" "CC_L2-2" "CC_L2-3" "TC_2" "TC_3"
[8] "TC_4" "TC_5" "TC_6" "TC_7" "TC_8" "TC_9" "TC_10"
[15] "CC_4" "CC_5" "CC_6" "CC_7" "CC_8" "CC_9" "CC_10"
[22] "CC_11" "CC_12" "TC_16" "TC_17" "TC_20" "TC_11" "TC_12"
[29] "TC_13" "TC_14" "TC_15"
images_ordered = c('TC_20', 'TC_17', 'TC_16', 'TC_15', 'TC_14', 'TC_13', 'TC_12', 'TC_11', 'TC_10', 'TC_9', 'TC_8', 'TC_7', 'TC_6', 'TC_5',
'TC_4', 'TC_3', 'TC_2', 'CC_4', 'CC_5', 'CC_6', 'CC_7', 'CC_8', 'CC_9', 'CC_10', 'CC_11', 'CC_12', 'CC_L2-3', 'CC_L2-2', 'CC_L2-1', 'CC_Cortical1', 'CC_Cortical2')
x_horz = 1:length(images_ordered) * 35
y_horz = rep(0, length(images_ordered))
horz_embedding = data.frame()
df_408$X_horz = -1
df_408$Y_horz = -1
df_408$clump = NaN
clump_header = '408_'
clump_files = list.files(clump_dir)
IMAGE_SIZE = 1024
## This is the size of an image in the global coordinate space
IMAGE_LEN = 25
images = list.files(meta_dir)
for(i in 1:length(images_ordered)){
image_name = images_ordered[i]
print(image_name)
split_names = strsplit(image_name, '_')
cortex = toupper(split_names[[1]][1])
number = split_names[[1]][2]
number_csv = paste0('_', number, '.csv')
filename = images[grepl(cortex, images) & grepl(number_csv, images) & grepl('408', images)]
coordinates = read.table(file.path(meta_dir, filename), sep = ',', header = TRUE)
## checked already that lists are equal, missing 1, 18, 19 for now, layer 1 and others
## Get the clumps
filename = clump_files[grepl(clump_header, clump_files) & grepl(paste0(image_name, '_'),clump_files)]
clump_df = as.data.frame(t(read.csv(file.path(clump_dir, filename), header = FALSE)))
colnames(clump_df) = c('roi', 'cluster')
clump_df$roi = as.numeric(clump_df$roi)
image_idxs = which(df_408$IMAGE.NAME == image_name)
clump_df$roi_idxs = image_idxs[1] + clump_df$roi - 1
df_408[clump_df$roi_idxs, "clump"] = paste0(clump_header, image_name, '_', clump_df$cluster)
## so this is a little tricky, so need to get it right
## Remember, it is the top right that the coordinate is coming from, but
## the bottom right is the new coordinate space.
## so first when we get the original coordinate space, to set to relative
## of bottom would be the same X, but 1024 - Y
## push out the coordinates for better visualization
#x_repelled <- (512 - coordinates$X_Coordinate_In_pixels)
df_408[df_408$IMAGE.NAME == image_name, 'X_horz'] = (coordinates$X_Coordinate_In_pixels /
IMAGE_SIZE * IMAGE_LEN) + y_horz[i]
df_408[df_408$IMAGE.NAME == image_name, 'Y_horz'] = ((1024-coordinates$Y_Coordinate_In_pixels) /
IMAGE_SIZE * IMAGE_LEN) + x_horz[i]
}
[1] "TC_20"
[1] "TC_17"
[1] "TC_16"
[1] "TC_15"
[1] "TC_14"
[1] "TC_13"
[1] "TC_12"
[1] "TC_11"
[1] "TC_10"
[1] "TC_9"
[1] "TC_8"
[1] "TC_7"
[1] "TC_6"
[1] "TC_5"
[1] "TC_4"
[1] "TC_3"
[1] "TC_2"
[1] "CC_4"
[1] "CC_5"
[1] "CC_6"
[1] "CC_7"
[1] "CC_8"
[1] "CC_9"
[1] "CC_10"
[1] "CC_11"
[1] "CC_12"
[1] "CC_L2-3"
[1] "CC_L2-2"
[1] "CC_L2-1"
[1] "CC_Cortical1"
[1] "CC_Cortical2"
rownames(df_408) = 1:nrow(df_408)
jy_408 = df_408 %>%
dplyr::select(-c(area, IMAGE.NAME, X_horz, Y_horz, clump)) %>%
t() %>%
CreateSeuratObject()
jy_408 <- NormalizeData(jy_408, scale.factor = 1e5) ###
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
normed = GetAssayData(jy_408, slot = 'data')
normed[normed < 3] = 0
for(gene_name in rownames(jy_408)) {
mdn_gene_expr = median(normed[gene_name, normed[gene_name, ] > 0])
#print(gene_name)
#print(mdn_gene_expr)
#normed[gene_name, normed[gene_name, ] < mdn_gene_expr] = 0
}
jy_408 <- SetAssayData(jy_408, slot = 'data', normed)
jy_408 <- FindVariableFeatures(jy_408, selection.method = "vst")
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
all.genes <- rownames(jy_408)
jy_408 <- ScaleData(jy_408, features = all.genes)
Centering and scaling data matrix
|
| | 0%
|
|==========================================================================================================| 100%
jy_408 <- RunPCA(jy_408, approx = FALSE)
Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.PC_ 1
Positive: VIP, ASCL1, CXCR4, RELN, SATB2, MAF1, KIA0319, EMX1, GAD1, CXCL12
PAX6, DLX2, SST, PROX1, LHX6, NCAM1
Negative: DCX, TBR1, LRP8, SCGN, COUPTF2, EGFR, CXCL14, TSHZ1, GSX2, EOMES
SP8, NKX2.1, CALB2, CXCR7, DCDC2, VLDLR
PC_ 2
Positive: TBR1, EOMES, KIA0319, DCDC2, LRP8, CALB2, DCX, SATB2, CXCL12, EGFR
EMX1, ASCL1, COUPTF2, PAX6, RELN, CXCR4
Negative: MAF1, TSHZ1, NKX2.1, SST, GAD1, DLX2, PROX1, SP8, GSX2, VIP
SCGN, LHX6, VLDLR, CXCL14, CXCR7, NCAM1
PC_ 3
Positive: COUPTF2, DLX2, PROX1, GAD1, CXCR4, LHX6, LRP8, TSHZ1, CXCL14, EOMES
NKX2.1, SP8, CALB2, CXCL12, DCDC2, VLDLR
Negative: SATB2, RELN, MAF1, PAX6, ASCL1, SCGN, SST, EMX1, GSX2, NCAM1
DCX, KIA0319, VIP, EGFR, CXCR7, TBR1
PC_ 4
Positive: NCAM1, TSHZ1, VLDLR, CXCL14, PROX1, EGFR, CXCR7, DCX, ASCL1, DCDC2
CALB2, PAX6, GSX2, SCGN, EOMES, KIA0319
Negative: LHX6, DLX2, LRP8, SP8, VIP, SST, COUPTF2, NKX2.1, TBR1, CXCR4
EMX1, RELN, CXCL12, MAF1, GAD1, SATB2
PC_ 5
Positive: GSX2, SP8, NKX2.1, SCGN, COUPTF2, CXCL14, EGFR, EMX1, TSHZ1, CALB2
CXCL12, TBR1, PAX6, SATB2, MAF1, KIA0319
Negative: SST, CXCR4, DCDC2, GAD1, PROX1, EOMES, DLX2, VIP, DCX, VLDLR
RELN, NCAM1, LHX6, ASCL1, CXCR7, LRP8
jy_408 <- FindNeighbors(jy_408, dims = 1:30)
Computing nearest neighbor graph
Computing SNN
jy_408 <- FindClusters(jy_408, resolution = 1.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 1013
Number of edges: 35954
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.5629
Number of communities: 12
Elapsed time: 0 seconds
jy_408 <- RunUMAP(jy_408, dims = 1:30)
15:46:39 UMAP embedding parameters a = 0.9922 b = 1.112
15:46:39 Read 1013 rows and found 30 numeric columns
15:46:39 Using Annoy for neighbor search, n_neighbors = 30
15:46:39 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:46:39 Writing NN index file to temp file /tmp/RtmpaBJ93n/file83475b9cef8d
15:46:39 Searching Annoy index using 1 thread, search_k = 3000
15:46:40 Annoy recall = 100%
15:46:40 Commencing smooth kNN distance calibration using 1 thread
15:46:41 Initializing from normalized Laplacian + noise
15:46:41 Commencing optimization for 500 epochs, with 38030 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:46:42 Optimization finished
DimPlot(jy_408, reduction = "umap", group.by = 'seurat_clusters') + NoAxes()
jy_408$clump = df_408$clump
DimPlot(jy_408, reduction = "umap", group.by = 'clump') + NoAxes() + NoLegend()
hcoords = df_408 %>% dplyr::select(c('X_horz', 'Y_horz')) %>% as.matrix()
colnames(hcoords) <- c('pixel_1', 'pixel_2')
jy_408[["H"]] <- CreateDimReducObject(embeddings = hcoords, key = "pixel_", assay = DefaultAssay(jy_408))
df_164 = data.frame()
for (file_name in list.files(data_dir)){
print(file_name)
if(grepl('408', file_name)){
next
}
#if(grepl('408_TC', file_name) | grepl('408_vMS', file_name)){
# next
#}
df_to_append <- read.table(file.path(data_dir, file_name), sep = ',', header = TRUE)
while(length(ind <- which(df_to_append$Image.Name == "")) > 0){
df_to_append$Image.Name[ind] <- df_to_append$Image.Name[ind -1]
}
colnames(df_to_append) <- toupper(colnames(df_to_append))
df_to_append <- df_to_append %>%
mutate(area = strsplit(file_name, '.csv')[[1]])
## Add relative_XY_position
if(!is_empty(df_164)){
df_to_append <- df_to_append %>%
dplyr::select(colnames(df_164))
}
df_164 <- rbind(df_164, df_to_append)
}
[1] "164_CC.csv"
[1] "164_MS_CC.csv"
[1] "164_MS_TC.csv"
[1] "164_TC.csv"
[1] "408_CC.csv"
[1] "408_dMS_TC.csv"
[1] "408_MS_CC.csv"
[1] "408_TC.csv"
[1] "408_vMS_TC.csv"
df_164$IMAGE.NAME = unlist(lapply(df_164$IMAGE.NAME, gsub, pattern='CC-', replacement='CC_'))
df_164$IMAGE.NAME = unlist(lapply(df_164$IMAGE.NAME, gsub, pattern='[*]', replacement=''))
df_164$IMAGE.NAME = unlist(lapply(df_164$IMAGE.NAME, gsub, pattern='X', replacement=''))
df_164$IMAGE.NAME = unlist(lapply(df_164$IMAGE.NAME, gsub, pattern='L2', replacement='CC_L2'))
df_164$IMAGE.NAME = unlist(lapply(df_164$IMAGE.NAME, gsub, pattern='L2_', replacement='L2-'))
df_164$IMAGE.NAME = unlist(lapply(df_164$IMAGE.NAME, gsub, pattern='-L2', replacement='_L2'))
tc_cortical_names_bad = df_164[grepl('TC', df_164$area) & grepl('Cortical', df_164$IMAGE.NAME), 'IMAGE.NAME']
df_164[grepl('TC', df_164$area) & grepl('Cortical', df_164$IMAGE.NAME), 'IMAGE.NAME'] = unlist(lapply(tc_cortical_names_bad, gsub, pattern='Cort', replacement='TC_Cort'))
cc_cortical_names_bad = df_164[grepl('CC', df_164$area) & grepl('Cortical', df_164$IMAGE.NAME), 'IMAGE.NAME']
df_164[grepl('CC', df_164$area) & grepl('Cortical', df_164$IMAGE.NAME), 'IMAGE.NAME'] = unlist(lapply(cc_cortical_names_bad, gsub, pattern='Cort', replacement='CC_Cort'))
#df_164$IMAGE.NAME = unlist(lapply(df_164$IMAGE.NAME, gsub, pattern='Tc_12', replacement='TC_12'))
## Missing
#df_164 = df_164[df_164$IMAGE.NAME != 'Layer1', ]
#df_164 = df_164[df_164$IMAGE.NAME != 'TC_1', ]
#df_164 = df_164[df_164$IMAGE.NAME != 'TC_18', ]
#df_164 = df_164[df_164$IMAGE.NAME != 'TC_19', ]
#df_164$IMAGE.NAME = toupper(df_164$IMAGE.NAME)
unique(df_164$IMAGE.NAME)
[1] "CC_8" "CC_10" "CC_Cortical1" "CC_Cortical2" "CC_L2-1" "CC_L2-2" "CC_L2-3"
[8] "CC_2" "CC_3" "CC_4" "CC_5" "CC_6" "CC_7" "CC_9"
[15] "TC_1" "TC_2" "TC_3" "TC_4" "TC_5" "TC_6" "TC_7"
[22] "TC_8" "TC_9" "TC_10" "TC_Cortical1" "TC_Cortical2" "TC_Cortical3"
image_names = unique(df_164$IMAGE.NAME)
# Preset these variables to negative values so I can easily check if they were updated later
df_164$X = -1
df_164$Y = -1
df_164$clump = NaN
clump_header = '164_'
clump_files = list.files(clump_dir)
# set some normalization variables
## This is the size of the image when the pixel values are taken from top left down
IMAGE_SIZE = 1024
## This is the size of an image in the global coordinate space
IMAGE_LEN = 32
TC_IMAGE_HEIGHT = 410
TC_IMAGE_WIDTH = 446
CC_IMAGE_HEIGHT= 422
CC_IMAGE_WIDTH = 214
# Load the dataframe with global and relative coordinates
img_cords = read.table(file.path(meta_dir, '164_pixel_coordinates.csv'), sep = ',', header = TRUE)
images = list.files(meta_dir)
for(image_name in image_names){
if(grepl('408', image_name)){
next
}
print(image_name)
split_names = strsplit(image_name, '_')
cortex = toupper(split_names[[1]][1])
number = split_names[[1]][2]
number_csv = paste0('_', number, '.csv')
filename = images[grepl(cortex, images) & grepl(number_csv, images) & grepl('164', images)]
coordinates = read.table(file.path(meta_dir, filename), sep = ',', header = TRUE)
## Get the clumps
filename = clump_files[grepl(clump_header, clump_files) & grepl(paste0(image_name, '_'),clump_files)]
if(length(filename != 0)){
clump_df = as.data.frame(t(read.csv(file.path(clump_dir, filename), header = FALSE)))
colnames(clump_df) = c('roi', 'cluster')
clump_df$roi = as.numeric(clump_df$roi)
if(image_name == "CC_L2-1"){
coordinates = coordinates[c(1:37, 39:nrow(coordinates)), ]
if(38 %in% clump_df$roi){clump_df = clump_df[clump_df$roi != 38, ]}
clump_df$roi[clump_df$roi > 37] = clump_df$roi[clump_df$roi > 37] - 1
}
image_idxs = which(df_164$IMAGE.NAME == image_name)
clump_df$roi_idxs = image_idxs[1] + clump_df$roi - 1
df_164[clump_df$roi_idxs, "clump"] = paste0(clump_header, image_name, '_', clump_df$cluster)
} else{
print('No clumps!')
print(image_name)
}
if(cortex == 'CC'){
print(paste('cc', filename, image_name))
## So if CC, we add the coordinates for TC_1 to overall image coordinates
x_adj = img_cords[img_cords$Name == 'TC_1', 'x'] +
img_cords[img_cords$Name == 'G_CC1_to_TC1', 'x']
## Start from bottom, add the height, subtract TC_1 height, and then global CC1 to TC1
y_adj = TC_IMAGE_HEIGHT - img_cords[img_cords$Name == 'TC_1', 'y'] +
img_cords[img_cords$Name == 'G_CC1_to_TC1', 'y'] + CC_IMAGE_HEIGHT
}else{
print(paste('tc', filename, image_name))
x_adj = 0
y_adj = TC_IMAGE_HEIGHT
}
## So don't do repelled for now
#x_repelled <- (512 - coordinates$X_Coordinate_In_pixels)
## so the resized x distance is from left, so just add to the box location and adj
df_164[df_164$IMAGE.NAME == image_name, 'X'] = (coordinates$X_Coordinate_In_pixels /
IMAGE_SIZE * IMAGE_LEN) +
img_cords[img_cords$Name == image_name, 'x'] + x_adj
## resized y distance
df_164[df_164$IMAGE.NAME == image_name, 'Y'] = y_adj - img_cords[img_cords$Name == image_name, 'y'] -
(coordinates$Y_Coordinate_In_pixels / IMAGE_SIZE *
IMAGE_LEN)
}
[1] "CC_8"
[1] "cc 164_CC_ROI_CC_8_clumps.csv CC_8"
[1] "CC_10"
[1] "cc 164_CC_ROI_CC_10_clumps.csv CC_10"
[1] "CC_Cortical1"
[1] "cc 164_CC_ROI_CC_Cortical1_clumps.csv CC_Cortical1"
[1] "CC_Cortical2"
[1] "cc 164_CC_ROI_CC_Cortical2_clumps.csv CC_Cortical2"
[1] "CC_L2-1"
[1] "cc 164_CC_ROI_CC_L2-1_clumps.csv CC_L2-1"
[1] "CC_L2-2"
[1] "cc 164_CC_ROI_CC_L2-2_clumps.csv CC_L2-2"
[1] "CC_L2-3"
[1] "cc 164_CC_ROI_CC_L2-3_clumps.csv CC_L2-3"
[1] "CC_2"
[1] "No clumps!"
[1] "CC_2"
[1] "cc CC_2"
[1] "CC_3"
[1] "cc 164_CC_ROI_CC_3_clumps.csv CC_3"
[1] "CC_4"
[1] "cc 164_CC_ROI_CC_4_clumps.csv CC_4"
[1] "CC_5"
[1] "cc 164_CC_ROI_CC_5_clumps.csv CC_5"
[1] "CC_6"
[1] "cc 164_CC_ROI_CC_6_clumps.csv CC_6"
[1] "CC_7"
[1] "cc 164_CC_ROI_CC_7_clumps.csv CC_7"
[1] "CC_9"
[1] "cc 164_CC_ROI_CC_9_clumps.csv CC_9"
[1] "TC_1"
[1] "tc 164_TC_ROI_TC_1_clumps.csv TC_1"
[1] "TC_2"
[1] "tc 164_TC_ROI_TC_2_clumps.csv TC_2"
[1] "TC_3"
[1] "tc 164_TC_ROI_TC_3_clumps.csv TC_3"
[1] "TC_4"
[1] "tc 164_TC_ROI_TC_4_clumps.csv TC_4"
[1] "TC_5"
[1] "tc 164_TC_ROI_TC_5_clumps.csv TC_5"
[1] "TC_6"
[1] "tc 164_TC_ROI_TC_6_clumps.csv TC_6"
[1] "TC_7"
[1] "tc 164_TC_ROI_TC_7_clumps.csv TC_7"
[1] "TC_8"
[1] "tc 164_TC_ROI_TC_8_clumps.csv TC_8"
[1] "TC_9"
[1] "tc 164_TC_ROI_TC_9_clumps.csv TC_9"
[1] "TC_10"
[1] "tc 164_TC_ROI_TC_10_clumps.csv TC_10"
[1] "TC_Cortical1"
[1] "tc 164_TC_ROI_TC_Cortical1_clumps.csv TC_Cortical1"
[1] "TC_Cortical2"
[1] "tc 164_TC_ROI_TC_Cortical2_clumps.csv TC_Cortical2"
[1] "TC_Cortical3"
[1] "tc 164_TC_ROI_TC_Cortical3_clumps.csv TC_Cortical3"
rownames(df_164) = 1:nrow(df_164)
jy_164 = df_164 %>%
dplyr::select(-c(area, IMAGE.NAME, X, Y, clump)) %>%
t() %>%
CreateSeuratObject()
jy_164 <- NormalizeData(jy_164, scale.factor = 1e5) ###
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
normed = GetAssayData(jy_164, slot = 'data')
normed[normed < 3] = 0
for(gene_name in rownames(jy_164)) {
mdn_gene_expr = median(normed[gene_name, normed[gene_name, ] > 0])
#print(gene_name)
#print(mdn_gene_expr)
#normed[gene_name, normed[gene_name, ] < mdn_gene_expr] = 0
}
jy_164 <- SetAssayData(jy_164, slot = 'data', normed)
jy_164 <- FindVariableFeatures(jy_164, selection.method = "vst")
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
all.genes <- rownames(jy_164)
jy_164 <- ScaleData(jy_164, features = all.genes)
Centering and scaling data matrix
|
| | 0%
|
|==========================================================================================================| 100%
jy_164 <- RunPCA(jy_164, approx = FALSE)
Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.PC_ 1
Positive: NKX2.1, MAF1, SCGN, PROX1, GAD1, SP8, VIP, LHX6, GSX2, DCX
TSHZ1, CXCR7, COUPTF2, SST, DLX2, LRP8
Negative: KIA0319, SATB2, ASCL1, PAX6, CALB2, DCDC2, RELN, NCAM1, CXCL12, TBR1
EMX1, EGFR, CXCL14, VLDLR, CXCR4, EOMES
PC_ 2
Positive: DCX, SCGN, SP8, TBR1, EGFR, SATB2, DCDC2, EOMES, NKX2.1, LRP8
KIA0319, NCAM1, EMX1, CXCR7, TSHZ1, CALB2
Negative: GAD1, VIP, CXCR4, LHX6, MAF1, DLX2, SST, RELN, CXCL12, CXCL14
COUPTF2, PROX1, PAX6, GSX2, ASCL1, VLDLR
PC_ 3
Positive: COUPTF2, SP8, PROX1, SCGN, EOMES, TBR1, MAF1, CXCR7, NKX2.1, GSX2
TSHZ1, LHX6, DCDC2, EGFR, VIP, LRP8
Negative: SST, ASCL1, PAX6, RELN, CALB2, CXCL14, EMX1, CXCL12, DLX2, NCAM1
GAD1, CXCR4, KIA0319, DCX, VLDLR, SATB2
PC_ 4
Positive: TSHZ1, PROX1, SP8, ASCL1, DLX2, VLDLR, NKX2.1, NCAM1, PAX6, CXCR7
SCGN, EMX1, RELN, CXCL14, VIP, KIA0319
Negative: LRP8, TBR1, EOMES, CALB2, LHX6, GSX2, COUPTF2, DCX, MAF1, GAD1
CXCL12, DCDC2, SATB2, EGFR, CXCR4, SST
PC_ 5
Positive: PAX6, COUPTF2, RELN, TSHZ1, DLX2, SP8, CXCL12, SCGN, TBR1, GSX2
LRP8, DCX, SATB2, LHX6, ASCL1, GAD1
Negative: NKX2.1, DCDC2, EOMES, VLDLR, NCAM1, SST, CXCR7, EMX1, CXCR4, EGFR
MAF1, KIA0319, VIP, PROX1, CXCL14, CALB2
jy_164 <- FindNeighbors(jy_164, dims = 1:30)
Computing nearest neighbor graph
Computing SNN
jy_164 <- FindClusters(jy_164, resolution = 0.8)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 802
Number of edges: 30100
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.6519
Number of communities: 6
Elapsed time: 0 seconds
jy_164 <- RunUMAP(jy_164, dims = 1:30)
15:46:44 UMAP embedding parameters a = 0.9922 b = 1.112
15:46:44 Read 802 rows and found 30 numeric columns
15:46:44 Using Annoy for neighbor search, n_neighbors = 30
15:46:44 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:46:44 Writing NN index file to temp file /tmp/RtmpaBJ93n/file834751395e2c
15:46:44 Searching Annoy index using 1 thread, search_k = 3000
15:46:45 Annoy recall = 100%
15:46:45 Commencing smooth kNN distance calibration using 1 thread
15:46:46 Initializing from normalized Laplacian + noise
15:46:46 Commencing optimization for 500 epochs, with 30056 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:46:47 Optimization finished
jy_164$clump <- df_164$clump
DimPlot(jy_164, reduction = "umap", group.by = 'seurat_clusters') + NoAxes()
DimPlot(jy_164, reduction = "umap", group.by = 'clump') + NoAxes() + NoLegend()
unique(df_164$IMAGE.NAME)
[1] "CC_8" "CC_10" "CC_Cortical1" "CC_Cortical2" "CC_L2-1" "CC_L2-2" "CC_L2-3"
[8] "CC_2" "CC_3" "CC_4" "CC_5" "CC_6" "CC_7" "CC_9"
[15] "TC_1" "TC_2" "TC_3" "TC_4" "TC_5" "TC_6" "TC_7"
[22] "TC_8" "TC_9" "TC_10" "TC_Cortical1" "TC_Cortical2" "TC_Cortical3"
images_ordered = c('TC_Cortical3', 'TC_Cortical2', 'TC_Cortical1', 'TC_10', 'TC_9', 'TC_8', 'TC_7', 'TC_6', 'TC_5', 'TC_4', 'TC_3', 'TC_2','TC_1','CC_2','CC_3',
'CC_4', 'CC_5', 'CC_6', 'CC_7', 'CC_8', 'CC_9', 'CC_10',
'CC_L2-1', 'CC_L2-2', 'CC_L2-3', 'CC_Cortical1', 'CC_Cortical2')
x_horz = 1:length(images_ordered) * 35
y_horz = rep(0, length(images_ordered))
horz_embedding = data.frame()
df_164$X_horz = -1
df_164$Y_horz = -1
images = list.files(meta_dir)
for(i in 1:length(images_ordered)){
image_name = images_ordered[i]
print(image_name)
split_names = strsplit(image_name, '_')
cortex = toupper(split_names[[1]][1])
number = split_names[[1]][2]
number_csv = paste0('_', number, '.csv')
filename = images[grepl(cortex, images) & grepl(number_csv, images) & grepl('164', images)]
coordinates = read.table(file.path(meta_dir, filename), sep = ',', header = TRUE)
if(image_name == "CC_L2-1"){
coordinates = coordinates[c(1:37, 39:nrow(coordinates)), ]
}
## checked already that lists are equal, missing 1, 18, 19 for now, layer 1 and others
## so this is a little tricky, so need to get it right
## Remember, it is the top right that the coordinate is coming from, but
## the bottom right is the new coordinate space.
## so first when we get the original coordinate space, to set to relative
## of bottom would be the same X, but 1024 - Y
## push out the coordinates for better visualization
#x_repelled <- (512 - coordinates$X_Coordinate_In_pixels)
df_164[df_164$IMAGE.NAME == image_name, 'X_horz'] = (coordinates$X_Coordinate_In_pixels /
IMAGE_SIZE * IMAGE_LEN) + y_horz[i]
df_164[df_164$IMAGE.NAME == image_name, 'Y_horz'] = ((1024-coordinates$Y_Coordinate_In_pixels) /
IMAGE_SIZE * IMAGE_LEN) + x_horz[i]
}
[1] "TC_Cortical3"
[1] "TC_Cortical2"
[1] "TC_Cortical1"
[1] "TC_10"
[1] "TC_9"
[1] "TC_8"
[1] "TC_7"
[1] "TC_6"
[1] "TC_5"
[1] "TC_4"
[1] "TC_3"
[1] "TC_2"
[1] "TC_1"
[1] "CC_2"
[1] "CC_3"
[1] "CC_4"
[1] "CC_5"
[1] "CC_6"
[1] "CC_7"
[1] "CC_8"
[1] "CC_9"
[1] "CC_10"
[1] "CC_L2-1"
[1] "CC_L2-2"
[1] "CC_L2-3"
[1] "CC_Cortical1"
[1] "CC_Cortical2"
hcoords = df_164 %>% dplyr::select(c('X_horz', 'Y_horz')) %>% as.matrix()
colnames(hcoords) <- c('pixel_1', 'pixel_2')
jy_164[["H"]] <- CreateDimReducObject(embeddings = hcoords, key = "pixel_", assay = DefaultAssay(jy_164))
jy_164<- RenameCells(jy_164, c(outer('164_', 1:ncol(jy_164), FUN=paste0)))
jy_164$area = df_164$area
jy_408<- RenameCells(jy_408, c(outer('408_', 1:ncol(jy_408), FUN=paste0)))
jy_408$area = df_408$area
jy_all <- merge(jy_164, jy_408)
jy_all <- NormalizeData(jy_all, scale.factor = 1e5) ###
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
normed = GetAssayData(jy_all, slot = 'data')
normed[normed < 3] = 0
for(gene_name in rownames(jy_all)) {
if (gene_name == 'DCX'){
mdn_gene_expr = 0.5
print('skip dcx')
} else if (!gene_name %in% c('COUPTF2', 'SP8')){
mdn_gene_expr = median(normed[gene_name, normed[gene_name, ] > 0])
}else{
mdn_gene_expr = quantile(normed[gene_name, normed[gene_name, ] > 0], .40)
}
normed[gene_name, normed[gene_name, ] < mdn_gene_expr] = 0
}
[1] "skip dcx"
jy_all <- SetAssayData(jy_all, slot = 'data', normed)
jy_all <- FindVariableFeatures(jy_all, selection.method = "vst")
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
all.genes <- rownames(jy_all)
jy_all <- ScaleData(jy_all, features = all.genes)
Centering and scaling data matrix
|
| | 0%
|
|==========================================================================================================| 100%
jy_all <- RunPCA(jy_all, approx = FALSE)
Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.Warning: Requested number is larger than the number of available items (32). Setting to 32.PC_ 1
Positive: DCX, TBR1, KIA0319, LRP8, SATB2, EOMES, DCDC2, EGFR, NCAM1, ASCL1
CALB2, VLDLR, PAX6, EMX1, COUPTF2, CXCR7
Negative: GAD1, VIP, LHX6, SST, MAF1, CXCR4, NKX2.1, DLX2, PROX1, TSHZ1
GSX2, RELN, SP8, CXCL14, CXCL12, SCGN
PC_ 2
Positive: SCGN, TSHZ1, DCX, NKX2.1, SP8, CXCR7, PROX1, GSX2, COUPTF2, TBR1
LRP8, VLDLR, CXCL14, EGFR, NCAM1, MAF1
Negative: ASCL1, CXCR4, KIA0319, SATB2, CALB2, CXCL12, VIP, RELN, PAX6, DLX2
GAD1, DCDC2, SST, EMX1, EOMES, LHX6
PC_ 3
Positive: NCAM1, TSHZ1, MAF1, VLDLR, PROX1, CXCR7, CXCL14, RELN, GSX2, ASCL1
PAX6, EGFR, CXCR4, VIP, DCDC2, NKX2.1
Negative: CALB2, TBR1, LRP8, CXCL12, DCX, DLX2, COUPTF2, SCGN, SST, EOMES
LHX6, SP8, SATB2, GAD1, KIA0319, EMX1
PC_ 4
Positive: TBR1, LRP8, GAD1, EGFR, LHX6, VIP, CXCR4, CXCL14, NCAM1, DCDC2
DCX, EOMES, SST, CXCR7, MAF1, RELN
Negative: SP8, DLX2, PAX6, SCGN, CALB2, NKX2.1, EMX1, ASCL1, SATB2, COUPTF2
KIA0319, CXCL12, PROX1, GSX2, VLDLR, TSHZ1
PC_ 5
Positive: RELN, GSX2, PAX6, MAF1, SST, SCGN, LHX6, TBR1, EGFR, LRP8
ASCL1, SP8, EMX1, SATB2, VIP, DCX
Negative: PROX1, CALB2, VLDLR, TSHZ1, EOMES, DLX2, DCDC2, CXCR4, CXCL12, GAD1
CXCR7, NCAM1, CXCL14, COUPTF2, NKX2.1, KIA0319
jy_all <- FindNeighbors(jy_all, dims = 1:30)
Computing nearest neighbor graph
Computing SNN
jy_all <- FindClusters(jy_all, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 1815
Number of edges: 61161
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8215
Number of communities: 7
Elapsed time: 0 seconds
jy_all <- RunUMAP(jy_all, dims = 1:30)
15:46:50 UMAP embedding parameters a = 0.9922 b = 1.112
15:46:50 Read 1815 rows and found 30 numeric columns
15:46:50 Using Annoy for neighbor search, n_neighbors = 30
15:46:50 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:46:50 Writing NN index file to temp file /tmp/RtmpaBJ93n/file8347566dc315
15:46:50 Searching Annoy index using 1 thread, search_k = 3000
15:46:50 Annoy recall = 100%
15:46:51 Commencing smooth kNN distance calibration using 1 thread
15:46:52 Initializing from normalized Laplacian + noise
15:46:52 Commencing optimization for 500 epochs, with 70670 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:46:54 Optimization finished
DimPlot(jy_all, reduction = "umap", group.by = 'seurat_clusters') + NoAxes()
DimPlot(jy_all, reduction = "umap", group.by = 'area') + NoAxes() # NoLegend()
FeaturePlot(jy_all, c('CXCL12', 'CXCR4'), cells = which(FetchData(jy_all, 'GAD1') > 0), cols = c( '#F18F01', '#048BA8'), blend= TRUE)
Warning: Only two colors provided, assuming specified are for features and agumenting with 'lightgrey' for double-negatives
new.cluster.ids = c('CGE/LGE',
'Ex',
'TBR1+ CGE',
'CALB2+DLX2+',
'VIP+GAD1+',
'SST+LHX6+',
'MGE')
names(new.cluster.ids) <- levels(jy_all)
jy_all <- RenameIdents(jy_all, new.cluster.ids)
plot_cluster_fraction <- function(sobj, clump_id){
all_idents = levels(Idents(sobj))
def_colors = hue_pal()(length(all_idents))
clump_obj <- sobj[,sobj$clump == clump_id]
clstrs <- as.character(Idents(clump_obj))
clmp_ids <- clump_obj$clump
clmp_df <- as.data.frame(cbind(clstrs, clmp_ids))
cell_count = nrow(clmp_df)
colnames(clmp_df) <- c('cluster', 'clump')
which_colors = def_colors[which(all_idents %in% clstrs)]
clmp_df %>% dplyr::count(clump, cluster) %>%
ggplot(aes(x="", y=n, fill=cluster))+
geom_bar(stat="identity", width=1, color="white") +
coord_polar("y", start=0) +
ggtitle(paste0(clump_id, ', n=', cell_count)) +
scale_fill_manual(values = which_colors) +
theme_void()+ NoLegend() + theme(title = element_text(face = 'bold', size = rel(0.8), hjust = 1)) # remove background, grid, numeric labels
}
plot_cluster_fraction(jy_all, '164_CC_8_0')
clmps = unique(jy_all$clump)
plots <- lapply(1:length(clmps), function(i){
plot_cluster_fraction(jy_all, clmps[i])
})
umaps = plot_grid(plotlist = plots, label_size = 3, nrow = 23)
umaps
aClumpDF <- as.data.frame(cbind(as.character(Idents(jy_all)), jy_all$clump, jy_all$area))
colnames(aClumpDF) <- c('ctype', 'clump', 'area')
g1 <- aClumpDF %>% filter(clump != NaN) %>%
dplyr::count(clump, ctype) %>%
ggplot(aes(clump, n, fill=ctype)) +
geom_bar(stat="identity")
g2 <- aClumpDF %>% filter(clump == NaN) %>%
dplyr::count(clump, ctype) %>%
ggplot(aes(ctype, y = n, fill = ctype)) +
geom_bar(stat="identity") + RotatedAxis() + NoLegend()
g2
aClumpDF %>% filter(clump != NaN) %>%
dplyr::count(clump, ctype) %>%
ggplot(aes(ctype, y = n, fill = ctype)) +
geom_bar(stat="identity") + RotatedAxis() + NoLegend()
## I want to summarize the ratio of nan to non-nan
aClumpDF %>% filter(clump != NaN) %>%
dplyr::count(ctype, area, clump) %>%
ggplot(aes(area, n, fill=ctype)) + RotatedAxis() +
geom_bar(stat="identity")
aClumpDF %>% filter(clump != NaN) %>%
dplyr::count(area, clump) %>%
ggplot(aes(area, n)) +
geom_boxplot() + RotatedAxis()
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
aClumpDF %>% group_by(clump) %>% mutate(top_ctype=Mode(ctype))
aClumpDF$top_ctype <- with(aClumpDF, ave(ctype, clump, FUN=Mode))
aClumpDF$top_ctype
# Create data
#clusters <- aClumpDF %>% filter(grepl('MS', area)) %>% dplyr::count(area, clump) %>% filter(clump != NaN)
clusters <- aClumpDF %>% dplyr::count(ctype, clump) %>% filter(clump != NaN)
# Generate the layout. This function return a dataframe with one line per bubble.
# It gives its center (x and y) and its radius, proportional of the value
packing <- circleProgressiveLayout(clusters$n, sizetype='area')
# We can add these packing information to the initial data frame
clusters <- cbind(clusters, packing)
# Check that radius is proportional to value. We don't want a linear relationship, since it is the AREA that must be proportionnal to the value
# plot(data$radius, data$value)
# The next step is to go from one center + a radius to the coordinates of a circle that
# is drawn by a multitude of straight lines.
clusters.gg <- circleLayoutVertices(packing, npoints=50)
clusters.gg$idents <- rep(clusters$ctype, each=51)
# Make the plot
ggplot() +
# Make the bubbles
geom_polygon(data = clusters.gg, aes(x, y, group = id, fill=as.factor(idents)), colour = "black", alpha = 0.9) +
# Add text in the center of each bubble + control its size
geom_text(data = clusters, aes(x, y, size=n, label = clump)) +
scale_size_continuous(range = c(1,3)) +
# General theme:
theme_void() +
theme(legend.position="none") +
coord_equal()
clump_meta <- data.frame()
jy_clump = jy_all[, jy_all$clump!= NaN]
tic()
for(i in 1:ncol(jy_clump)){
# Iterate through the cells, get the neighbors for that particular cell
cell <- colnames(jy_clump[,i])
clump <- jy_clump[, jy_clump$clump == jy_clump$clump[i]]
neighbors <- clump@meta.data %>% filter(clump != cell)
probs = rep(0, length(levels(jy_all)))
neighbor_ids <- as.numeric(neighbors$seurat_clusters)
# Calculate the probability of each cluster type
for(j in 1:length(unique(neighbor_ids))){
j_class = unique(neighbor_ids)[j]
probs[j_class] = sum(neighbor_ids == j_class) / nrow(neighbors)
}
clump_meta <- rbind(clump_meta, probs)
colnames(clump_meta) <- c(outer('p', 1:length(levels(jy_all)), FUN=paste0))
}
toc()
clump_meta
clumps <- as.data.frame(cbind(jy_clump@meta.data, clump_meta))
ref_cluster = 1
plots = list()
for(i in 1:length(levels(unique(clumps$seurat_clusters)))){
ref_cluster = i
plots[[i]] <- clumps %>%
filter(as.numeric(seurat_clusters) == ref_cluster) %>%
dplyr::select(c(seurat_clusters, p1, p2, p3, p4, p5, p6,p7,p8,p9,p10,p11,p12,p13)) %>%
pivot_longer(!seurat_clusters, names_to = "classes", values_to = "prob") %>%
ggplot(aes(x = prob, fill = factor(classes, levels = c(outer('p', 1:length(levels(jy_all)), FUN=paste0))))) + geom_boxplot(notch = FALSE) +
ggtitle(paste(levels(jy_all)[ref_cluster], "vs other")) + scale_fill_discrete(name = 'subtypes', labels=levels(Idents(jy_all)))
}
plot_grouped <- marrangeGrob(plots, nrow=2, ncol=2)
ggsave(filename = 'cell_specific_clustering.pdf', path = file.path(output_dir_plot, '20220808_1'), plot_grouped, height = 10, width = 14, dpi = 150)
ref_cluster = 2
clumps %>%
filter(cluster == ref_cluster) %>%
select(c(image, p0, p1, p2, p3, p4, p5, p6)) %>%
pivot_longer(!cluster, names_to = "classes", values_to = "prob") %>%
ggplot(aes(x = prob, color = classes)) + geom_boxplot() +
ggtitle(paste("Cluster",as.character(ref_cluster-1), "vs other"))
pmat <- clumps %>%
group_by(seurat_clusters) %>%
dplyr::select(c(p1, p2, p3, p4, p5, p6,p7,p8,p9,p10,p11,p12,p13)) %>%
dplyr::summarise(across(everything(), list(mean))) %>%
dplyr::select(-seurat_clusters) %>%
as.matrix()
rownames(pmat) <- levels(Idents(jy_all))
colnames(pmat) <- rownames(pmat)
pheatmap(pmat, color = turbo(n = 50))
library("viridis") # Load
pheatmap(pmat,color = rocket(n = 50), cluster_rows = FALSE, cluster_cols = FALSE)
## I want to know the expression of different genes by the different areas
## and clumps in those respective areas
## So basically 4 plots, anterior dorsal, ventral, and posterior
## Then need the clump name per thing-a-ma-bob
## Clump versus no clump
## differences between clumps (top_ctype per clump)
## Start with the barcharts
plot_functional_genes <- function(sobj, group_key, group = 'area', fxn_gene_list = NULL) {
## So I need to make a barchart with the average expression of the particular
## gene list the I care about. This can be hardcoded... here?
if(is.null(fxn_gene_list)){
fxn_gene_list = c('VLDLR', 'LRP8', 'CXCR4', 'CXCR7', 'CXCL12', 'CXCL14', 'NCAM1')
}
sbset_obj = sobj[, sobj[[group]] == group_key]
## I want to plot their "clumpedness," so really it's either clump or no clump as a group
gene_expr = as.data.frame(FetchData(sbset_obj, fxn_gene_list))
gene_expr$clump = sbset_obj$clump != "NaN"
gene_expr %>%
pivot_longer(!clump, names_to = 'gene', values_to = 'expr') %>%
ggplot(aes(x = gene, y = expr, fill = clump)) +
geom_boxplot() + theme_classic() + ggtitle(group_key) + RotatedAxis()
}
plot_functional_genes(jy_all, '164_TC')
## For areas
area_list = unique(jy_all$area)
plots = list()
plots <- lapply(1:length(area_list), function(i){
plot_functional_genes(jy_all, area_list[i])
})
vlnplots = plot_grid(plotlist = plots, label_size = 3, nrow = 9)
vlnplots
#ggsave(filename = 'img_fxnal_gene_clumps.pdf', path = file.path(output_dir_plot, '20220817_1'), vlnplots, height = 9, width = 12, dpi = 150)
## for images
jy_all$image = c(df_164$IMAGE.NAME, df_408$IMAGE.NAME)
image_list = unique(jy_all$image)
plots = list()
plots <- lapply(1:length(image_list), function(i){
plot_functional_genes(jy_all, image_list[i], group = 'image')
})
bxplots = plot_grid(plotlist = plots, label_size = 3, nrow = 12)
bxplots
ggsave(filename = 'img_fxnal_gene_clumps.pdf', path = file.path(output_dir_plot, '20220817_1'), bxplots, height = 18, width = 12, dpi = 150)
plot_clump_expr <- function(sobj, clump_name, gene, pt_size = 8, IMAGE_SPECIFIC = FALSE) {
sbset_obj = sobj[, df_408$IMAGE.NAME == clump_name]
xy = Embeddings(sbset_obj, reduction = 'H')
#expmat = as.character(Idents(sbset_obj))
expmat = FetchData(sbset_obj, gene)
df <- as.data.frame(cbind(xy, expmat))
colnames(df) <- c('x', 'y', 'ident')
ligands = c('CXCL12', 'CXCL14', 'RELN')
receptors = c('CXCR4', 'CXCR7', 'LRP8', 'VLDLR', 'EGFR')
maturity = c('VIP', 'SST')
adhesion = c('NCAM1')
immaturity = c('DCX')
disease = c('DCDC2', 'KIA0319')
color_df = as.data.frame(rownames(sobj))
colnames(color_df) = 'gene'
color_df$color = '#0768FA'
color_df$color[color_df$gene %in% ligands] = '#FFFB46'
color_df$color[color_df$gene %in% receptors] = '#F26419'
color_df$color[color_df$gene %in% maturity] = '#450920'
color_df$color[color_df$gene %in% adhesion] = '#F42C04'
color_df$color[color_df$gene %in% immaturity] = '#0CF700'
color_df$color[color_df$gene %in% disease] = '#8FB356'
if (IMAGE_SPECIFIC){
title_name = clump_name
} else{
title_name = gene
}
#colors = hue_pal()(length(levels(Idents(jy_all))))
#colors = colors[which(levels(Idents(jy_all)) %in% df$ident)]
colors = c('grey90', 'grey90', color_df$color[color_df$gene == gene])
df$x = as.numeric(df$x)
df$y = as.numeric(df$y)
df$clump = sbset_obj$clump
df$ident_label = as.character(round(df$ident, digits = 2))
df %>%
#ggplot(aes(x = x, y = y, color = clump)) +
#geom_point(size = 8) + theme_classic() + ggtitle(clump_name) + coord_fixed(ratio = 1) + NoAxes() ## without cell type
#ggplot(aes(x = x, y = y, color = factor(ident, levels = levels(Idents(sobj))))) +
ggplot(aes(x = x, y = y, color = ident)) +
geom_point(size = pt_size) + theme_classic() + ggtitle(title_name) + coord_fixed(ratio = 1) + NoAxes() + #scale_color_manual(values = colors) +
geom_encircle(data = filter(df, clump != "NaN"), expand = 0.03,aes(group = clump) ) + scale_color_gradient(na.value = colors[1], low = colors[2], high = colors[3]) #+ geom_label_repel(data = filter(df, ident > 0), label = filter(df, ident > 0)[['ident_label']])
#+ scale_color_gradient(limits = c(0,11))
}
plot_clump_expr(jy_408, 'TC_9', 'GAD1')
plot_clump_clust <- function(sobj, clump_name, pt_size = 8) {
sbset_obj = sobj[, df_408$IMAGE.NAME == clump_name]
xy = Embeddings(sbset_obj, reduction = 'H')
#expmat = as.character(Idents(sbset_obj))
expmat = FetchData(sbset_obj, gene)
df <- as.data.frame(cbind(xy, expmat))
colnames(df) <- c('x', 'y', 'ident')
#colors = hue_pal()(length(levels(Idents(jy_all))))
#colors = colors[which(levels(Idents(jy_all)) %in% df$ident)]
colors = c('grey90', 'grey90', color_df$color[color_df$gene == gene])
df$x = as.numeric(df$x)
df$y = as.numeric(df$y)
df$clump = sbset_obj$clump
df %>%
ggplot(aes(x = x, y = y, color = clump)) +
geom_point(size = pt_size) + theme_classic() + ggtitle(clump_name) + coord_fixed(ratio = 1) + NoAxes() + ## without cell type
#ggplot(aes(x = x, y = y, color = factor(ident, levels = levels(Idents(sobj))))) +
#ggplot(aes(x = x, y = y, color = ident)) +
#geom_point(size = 8) + theme_classic() + ggtitle(clump_name) + coord_fixed(ratio = 1) + NoAxes() + #scale_color_manual(values = colors) +
geom_encircle(data = filter(df, clump != "NaN"), expand = 0.03,aes(group = clump) )
}
plot_clump_clust(jy_408, 'TC_11')
#jy_164$image = jy_all$image[1:ncol(jy_164)]
plot_clump_celltype(jy_408, 'TC_3', pt_size = 3)
Error in `f()`:
! Insufficient values in manual scale. 5 needed but only 0 provided.
Backtrace:
1. base (local) `<fn>`(x)
2. ggplot2:::print.ggplot(x)
4. ggplot2:::ggplot_build.ggplot(x)
5. base::lapply(data, scales_map_df, scales = npscales)
6. ggplot2 (local) FUN(X[[i]], ...)
...
13. ggplot2 (local) FUN(X[[i]], ...)
14. self$map(df[[j]])
15. ggplot2 (local) f(..., self = self)
16. self$palette(n)
17. ggplot2 (local) f(...)
dev.off()
Error in dev.off() : cannot shut down device 1 (the null device)
dapi_img = image_read('/home/aunoy/st/arc_profiling/st_analysis/hand_annotated_data/test/408_tc3.tif')
myplot <- plot_clump_celltype(jy_408, 'TC_3', pt_size = 5)
ggdraw() +
draw_image(dapi_img) +
draw_plot(myplot, scale = 1.14)
normed = GetAssayData(jy_408, slot = "data")
plots = list()
plots <- lapply(1:length(unique(df_408$IMAGE.NAME)), function(i){
plot_clump_expr(jy_408, unique(df_408$IMAGE.NAME)[i], 'COUPTF2', pt_size = 8, IMAGE_SPECIFIC = TRUE)
})
all_plots = plot_grid(plotlist = plots, label_size = 1, nrow = 6)
all_plots
ggsave(filename = paste0('408_COUPTF2_clump_profiles.pdf'), path = file.path(output_dir_plot, '20220823_1'), all_plots,
height = 18, width = 18)
sum(FetchData(jy_all, 'GAD1') != 0) / sum(FetchData(jy_all, 'GAD1') == 0)
DimPlot(jy_all) + NoAxes()
FeaturePlot(jy_all, cells = which(Idents(jy_all) == 'VIP+GAD1+'), c('COUPTF2', 'SP8', 'PROX1', 'DLX2'), cols = c('lightgrey', hue_pal()(9)[5]))
FeaturePlot(jy_all, cells = which(Idents(jy_all) == 'VIP+GAD1+'), c('VIP', 'LHX6', 'NKX2.1', 'GAD1'), cols = c('lightgrey', hue_pal()(9)[5]))
FeaturePlot(jy_all, c('COUPTF2', 'SP8', 'PROX1', 'DLX2'), cols = c('lightgrey', hue_pal()(9)[5]))
FeaturePlot(jy_all, c('NKX2.1', 'LHX6', 'MAF1', 'GAD1'), cols = c('lightgrey', hue_pal()(9)[9]))
FeaturePlot(jy_all, c('TSHZ1', 'GSX2', 'GAD1'), cols = c('lightgrey', hue_pal()(9)[1]))
FeaturePlot(jy_all, 'DCX', cols = c('lightgrey', 'green')) + NoAxes() + NoLegend()
FeaturePlot(jy_all, c('LHX6', 'SST'), cols = c('lightgrey', hue_pal()(7)[6]), split.by = 'area', by.col = TRUE)
genes = c('LHX6', 'SST')
plots <- lapply(1:length(genes), function(i){
plot_features_umap(jy_all, genes[i], pt.size = 0.8, alpha = 1, color = hue_pal()(7)[6])
})
umaps = plot_grid(plotlist = plots, label_size = 10, nrow = 1)
umaps
DimPlot(jy_all, split.by = 'area', ncol = 3)
FeaturePlot(jy_all, c('TBR1', 'COUPTF2'), cols = c('#F18F01', '#048BA8'), blend= TRUE)
FeaturePlot(jy_all, c('VIP', 'SST'), cols = c( '#F18F01', '#048BA8'), blend= TRUE)
FeaturePlot(jy_all, c('TBR1', 'COUPTF2'), cols = c( '#F18F01', '#048BA8'), blend= TRUE)
DimPlot(jy_all, split.by = 'area', ncol = 3)
FeaturePlot(jy_all, features = c('GAD1', 'DLX2'), split.by = 'area', by.col = TRUE)
jy_all
breakpoints = 1:40/20
i = 1
res_str = 'clust_tree_res.'
norm_str = 'RNA_snn_res.'
for (breakpoint in breakpoints){
jy_all <- FindClusters(jy_all, resolution = breakpoint, verbose = FALSE)
jy_all[[paste0(res_str, breakpoint)]] = jy_all[[paste0(norm_str, breakpoint)]]
}
library(clustree)
clustree(jy_all, prefix = res_str)
DotPlot(jy_all, features = all.genes) + RotatedAxis()
DotPlot(jy_all, features = all.genes, group.by = 'broad_areas') + RotatedAxis()
DimPlot(jy_all, split.by = 'broad_areas', ncol = 2)
DimPlot(jy_all, group.by = 'broad_areas') + NoAxes()
XY_164 %>%
ggplot(aes(x = pixel_1, y = pixel_2)) + geom_point()